rm(list = ls(all = TRUE))
#library(foreign)

# DATA AND PATHS
# --------------
# estimated policy positions
inFile <- "./generated_data/2-estimated_positions.RData"

# budget data 
budgetFile <- "./original_data/budget_shares_with_ministers.csv"

# out files
outFile <- "./generated_data/3-budget_data_with_estimated_positions.RData"
outStata <- "./generated_data/3-budget_data_with_estimated_positions.dta"


# READ AND ORGANIZE DATA
# ----------------------
# load estimated positions
load(inFile)

# code backbenchers
data$position[is.na(data$position)] <- "Backbenchers"

# read budget data
budget <- read.csv(budgetFile,stringsAsFactors=FALSE)
budget$debate.year <- as.numeric(format(as.Date(budget$debate.date), "%Y"))


# data checks on budget data file
# Does each member have a unique party affiliation?
# Yes, everybody besides Mary Harney who switched from PD to FF
n1 <- budget[!duplicated(budget$memberID),]
n2 <- budget[!duplicated(paste(budget$memberID,budget$party.abbrv.ministers)),]
nrow(n2)-nrow(n1)
table(paste(n2$fname,n2$lname))[table(paste(n2$fname,n2$lname))>1]

# merge estimated positions to budget data
data <- merge(budget,data,by=c("memberID","debate.year"),all.x=TRUE)

# data check: compare names from budget file with names from speaker file
d <- data[!duplicated(data$memberID),]
d <- d[!is.na(d$last.name),]
d[,c("first.name","last.name","fname","lname")]
# all good! Note that in budget years 2012 and 2013, we combined Finance Department with Reform and Public Expenditure department

# data check: compare party affiliations from budget file with affiliations from speaker file
# - correct Bobby Molloy's (memberID==768) affiliation. he was member of PD
data$party.abbrv[data$memberID==768] <- "PD"
d <- data[!duplicated(data$memberID),]
d <- d[!is.na(d$party.abbrv),]
identical(d$party.abbrv,d$party.abbrv.ministers)
# all good!

# combine variables where missings were introduced
data$party.abbrv[is.na(data$party.abbrv)] <- data$party.abbrv.ministers[is.na(data$party.abbrv)]

data$last.name[is.na(data$last.name)] <- data$lname[is.na(data$last.name)]

data$first.name[is.na(data$first.name)] <- data$fname[is.na(data$first.name)]

data$position[is.na(data$position)] <- data$position.ministers[is.na(data$position)]

# data should now only include govt members. check 5 cases which are
# coded as backbenchers

# Shane McEntee (2011/12):  Minister of State for Food, Horticulture and Food Safety from 2011 to 2012
d <- data[data$position=="Backbenchers",]
data$position[data$memberID==2137 & (data$debate.year==2011 | data$debate.year==2012)] <- "Minister of State"

# Dick Roche (2001): was backbencher and hence removed from the data set
data <- data[!(data$memberID==1003 & data$debate.year==2001),]

# Róisín Shortall (2011): Minister of State for Primary Care from 2011 to 2012
data$position[data$memberID==1041 & data$debate.year==2011] <- "Minister of State"

nrow(data[data$position=="Backbenchers",])
# all good!

# add budget year
names(data)[names(data)=="budget_year"] <- "budget.year"
data$budget.year <- data$debate.year + 1

# save data
save(data,file=outFile)

data[data==""] <- NA
write.dta(data,outStata)
